Load generic libraries
source('configuration.r')
Load plot specific libraries
suppressMessages(library(igraph))
library(ggraph)
library(foreach)
Cluster mash distance matrix for plasmids
p.dist <- read.table('../tables/plasmid_mash_dist.dat')
clustering <- hclust(as.dist(p.dist), method='single')
clusters <- data.frame(clusters=cutree(clustering, h=0.02)) %>% rownames_to_column('plasmid_ID')
mapping.p <-
read.table('../tables/plasmid_info.dat', stringsAsFactors = FALSE, sep='\t', head=TRUE)
plasmid.dat <- merge(clusters, mapping.p, by='plasmid_ID') ## this merge removes duplicated libraries in the matrix
## write merged file for verification of the cluster using annotation
write.table(plasmid.dat, '../output_tables/plasmid_info.hclust0.05.dat', sep='\t', quote=F, row.names=F, col.names = T)
Graph
links.p <- read.table('../tables/antibiotics_gene_linkage.plasmid.tsv', stringsAsFactors = FALSE, head=T)
pcluster.ar.links <- merge(links.p, plasmid.dat, by="plasmid_ID") %>%
select(plasmid_ID, clusters, ar=AR_gene) %>%
count(clusters, ar) %>%
mutate(V6=str_replace(ar, 'PheCmlA5', 'Phe')) %>% ## ad hoc correction
mutate(V6=str_replace(ar, 'Far1_Fcd', 'Far1_Bla'))
pcluster.ar.links <- count(plasmid.dat, clusters, name="cluster.size") %>%
merge(pcluster.ar.links, by="clusters") %>%
mutate(score=n/cluster.size) %>%
select(clusters, ar, score, cluster.size) %>%
filter(cluster.size>1) %>% ## remove edges connecting to cluster with size 1
mutate(clusters=str_c('#', clusters))
g <- graph_from_data_frame(pcluster.ar.links, directed=FALSE)
V(g)$type <- str_detect(V(g)$name, '^#')
V(g)$name <- V(g)$name
V(g)$size <- 10
V(g)$size[V(g)$type] <- (select(pcluster.ar.links, clusters, cluster.size) %>% unique())[,2]
V(g)$name[V(g)$type] <- paste0(V(g)$name[V(g)$type], "(",V(g)$size[V(g)$type], ")")
E(g)$weight <- pcluster.ar.links$score
ggraph(g, layout='fr') +
geom_edge_arc(aes(width=weight,col=weight>=1), alpha=0.4,
curvature = 0.05,
end_cap=circle(4, 'mm'), start_cap=circle(4, 'mm')) +
geom_node_point(aes(shape=type, size=size, color=type)) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_color_manual(values=c('black','red')) +
scale_edge_width_continuous(range=c(0.1, 2)) +
scale_radius(range=c(8,20)) +
scale_color_manual(values=pal_npg("nrc")(10)[2:3]) +
theme_void() +
scale_shape_manual(values=c(18,19))
ggsave("../plots/fig3_ar_gene_graph_plasmid.pdf", height = 15, width = 25)
Genome data
genome.dat <- read.table("../tables/genome_info.dat", head=TRUE, sep='\t') %>% select(clusters=Species_name, Nanopore_ID)
Graph
links.s <- read.table('../tables/antibiotics_gene_linkage.species.tsv', stringsAsFactors = FALSE, head=T)
## focus only on high/medium quality genomes
links.s <- merge(links.s, genome.dat, by=c(1,2))
pcluster.ar.links <- count(links.s, species, sample, AR_gene) %>% ## de-duplicate multiple copies
select(clusters=species, ar=AR_gene) %>%
count(clusters, ar) %>%
mutate(ar=str_replace(ar, 'PheCmlA5', 'Phe')) %>% ## ad hoc correction
mutate(ar=str_replace(ar, 'Far1_Fcd', 'Far1_Bla'))
pcluster.ar.links <- count(genome.dat, clusters, name="cluster.size") %>%
merge(pcluster.ar.links, by="clusters") %>%
mutate(score=n/cluster.size) %>%
select(clusters, ar, score, cluster.size) %>%
filter(cluster.size>1) %>% ## remove edges connecting to cluster with size 1
mutate(clusters=str_replace(clusters, '_', ' '))
g <- graph_from_data_frame(pcluster.ar.links, directed=FALSE)
V(g)$type <- V(g)$name %in% unique(pcluster.ar.links$clusters)
V(g)$name <- V(g)$name
V(g)$size <- 10
V(g)$size[V(g)$type] <- (select(pcluster.ar.links, clusters, cluster.size) %>% unique())[,2]
V(g)$name[V(g)$type] <- paste0(V(g)$name[V(g)$type], "(",V(g)$size[V(g)$type], ")")
E(g)$weight <- pcluster.ar.links$score
ggraph(g, layout='fr') +
geom_edge_arc(aes(width=weight,col=weight>=0.8), alpha=0.4,
curvature = 0.05,
end_cap=circle(4, 'mm'), start_cap=circle(4, 'mm')) +
geom_node_point(aes(shape=type, size=size, color=type)) +
geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
scale_edge_color_manual(values=c('black','red')) +
scale_edge_width_continuous(range=c(0.1, 2)) +
scale_radius(range=c(8,20)) +
scale_color_manual(values=pal_npg("nrc")(10)[2:3]) +
theme_void() +
scale_shape_manual(values=c(18,19))
ggsave("../plots/fig3_ar_gene_graph_genome.pdf", height = 25, width = 25)
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
##
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] foreach_1.4.4 ggraph_1.0.2 igraph_1.2.2 ggsci_2.9
## [5] reshape2_1.4.3 stringr_1.3.0 tibble_2.0.1 tidyr_0.8.3
## [9] dplyr_0.8.0.1 gridExtra_2.3 ggplot2_3.1.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.0 pillar_1.3.1 compiler_3.4.4
## [4] plyr_1.8.4 iterators_1.0.9 viridis_0.5.1
## [7] tools_3.4.4 digest_0.6.18 viridisLite_0.3.0
## [10] evaluate_0.10.1 gtable_0.2.0 pkgconfig_2.0.2
## [13] rlang_0.3.1 ggrepel_0.8.0 yaml_2.1.18
## [16] withr_2.1.2 knitr_1.20 rprojroot_1.3-2
## [19] tidyselect_0.2.5 glue_1.3.0 R6_2.4.0
## [22] rmarkdown_1.9 farver_1.0 tweenr_1.0.0
## [25] purrr_0.3.1 magrittr_1.5 codetools_0.2-15
## [28] units_0.6-1 backports_1.1.2 scales_1.0.0
## [31] htmltools_0.3.6 MASS_7.3-49 assertthat_0.2.0
## [34] ggforce_0.1.3 colorspace_1.3-2 labeling_0.3
## [37] stringi_1.3.1 lazyeval_0.2.1 munsell_0.5.0
## [40] crayon_1.3.4